####################################################################################

#What You See and What You Get
#This file conducts the analysis for W1 survey
#Creates the analyses used for Table and Figures in the main paper and appendix
#Conducts the analyses for data cited in the paper/appendix


####################################################################################

rm(list=ls())
library(AER)
library(tidyverse)
library(readxl)
library(lubridate)
library(sandwich)
library(xtable)
library(corrgram)
library(forcats)
library(estimatr)
library(fastDummies)
library(randomizr)
library(ANOVAreplication)
library(miceadds)
library(clubSandwich)
library(texreg)
options(scipen=999)

source("Code/functions.R")

####################################################################################
# Read W1 dataset (and perform changes)                                       ######
####################################################################################

load("Data/surveyW1-WhatYouSee.Rda")
print(comment(survey))

# Declare D (compliance)
survey$D <- survey$ganhou_r
survey$D_composite <- survey$ganhou_composite

# Create index of incumbent indices (unplanned)
# Following the same structure for the individual indices

set.seed(1977)

#PT
pt_incumbent_index_vars <- imp.mice(survey %>% dplyr::select(lula_eval, dilma_eval, party_pt_r)) 
survey <- survey %>% bind_cols(
  pt_incumbent_index = calculate_mean_effects_index(Z = survey$Z
                                           , outcome_mat = pt_incumbent_index_vars, greedy = TRUE))

#Incumbent at Treat
incumbent_treat_index_vars <- imp.mice(survey %>% dplyr::select(dilma_eval, temer_eval, paes_eval, 
                                                         pezao_eval))
survey <- survey %>% bind_cols(
  treat_incumbent_index = calculate_mean_effects_index(Z = survey$Z
                                          , outcome_mat = incumbent_treat_index_vars , greedy = TRUE))


#National Incumbent
national_incumbent_index_vars <- imp.mice(survey %>% dplyr::select(lula_eval, dilma_eval, temer_eval)) 
survey <- survey %>% bind_cols(
  national_incumbent_index = calculate_mean_effects_index(Z = survey$Z
                        , outcome_mat = national_incumbent_index_vars, greedy = TRUE ))


#Local Incumbent
local_incumbent_index_vars <- imp.mice(survey %>% dplyr::select(paes_eval, pezao_eval, voteinc_2016_d)) 
survey <- survey %>% bind_cols(
  local_incumbent_index = calculate_mean_effects_index(Z = survey$Z
                          , outcome_mat = local_incumbent_index_vars, greedy = TRUE ))


surveyw1 <- survey
rm(survey)

####################################################################################
# Declare variables for W1 (controls and outcomes)                            ######
####################################################################################

# Declare labels for control and balance variables
pre.treat <-  c(
    "sexor", 
    "race_bin",
    "religiao_dummy",
    "criancas_moram",
    "sch_years",
    "cadunico",
    "formal",
    "av_formalinc_log")
  
bal.labels <- c(
    "Sex (male)",
    "Race (white)", 
    "Religion (any)",
    "Children (N)",
    "Schooling (years)",
    "Cadunico",
    "Years in Formal Employment",
    "Avg. Formal Wages")

####################################################################################
# Declare outcomes for analysis                                               ######
####################################################################################
outcomes1 <- c("pt_incumbent_index", "treat_incumbent_index")
outcomes2 <- c("resp_lula_dilma_pt", "resp_paes_pezao_temer_mdb")  
outcomes3 <- c("lula_eval", "dilma_eval","party_pt_r", "paes_eval", "pezao_eval", "temer_eval")
outcomes4 <- c("voteinc_2016_d")

#These definitions are needed below, in different parts of the code
the.fam <- paste("outcomes",1:4,sep="")
outcomes <- do.call("c",sapply(paste("outcomes",1:4,sep=""),"get"))
out.class <- paste("H",1:4,sep="")
names(outcomes)<-NULL
out.selected <- c(1,2) #select the main outcomes  
the.labels <-  c("PT Evalulation Index", "Incumbent at Treatment \n Evaluation Index",
                 "Attribution to Lula/Dilma/PT", 
                 "Attribution to Paes/Pezao/Temer/MDB",
                 "Lula Evaluation", "Dilma Evaluation",
                 "Workers' Party ID", 
                 "Paes Evaluation", "Pezão Evaluation", "Temer Evaluation", "Vote Pedro Paulo 2016")

# Check whether labels are correct
main <- rep(0,length(outcomes))
main[out.selected] <- 1
var.labels<- data.frame(outcomes,the.labels,main)
print(var.labels)
save(var.labels,file="Routputs/out-W1-variablelabels.RData")

# Print summary statistics 
sumtab <- summary_stats(data=subset(surveyw1,select=c("Z","D","D_composite", "idade",
                                                      pre.treat,outcomes)))
print(round(sumtab,2))
xsumtab <- xtable(sumtab,digits=c(0,0,2,2,2,2,2,0))
label(xsumtab) <- paste("tab:summary",sep="")
caption(xsumtab) <- paste("Descriptive Statistics",sep="")
print(xsumtab,
      type="latex",
      hline.after=1,
      caption.placement="top",
      file="Tables/tab-a2.tex")

#####################################################################################
###### Prior to rescaling
##### Descriptive information

control <- surveyw1 %>% filter(Z == 0)

par(mfrow=c(2,2))
barplot(table(control$lula_eval), xlab = "Lula Eval.")
barplot(table(control$dilma_eval), xlab = "Dilma Eval.")
barplot(table(control$paes_eval), xlab = "Paes Eval.")
barplot(table(control$pezao_eval), xlab = "Pezao Eval.")

summary(control$lula_eval)
summary(control$paes_eval)
summary(control$dilma_eval)
summary(control$pezao_eval)

ks.test(control$paes_eval, control$dilma_eval)
ks.test(control$paes_eval, control$lula_eval)
ks.test(control$paes_eval, control$pezao_eval)

par(mfrow=c(1,1))
boxplot(control$pt_incumbent_index, xlab = "PT incumbent Index")


######################### Simulation for Political Magnitude of Effect

control <- surveyw1 %>% filter(Z == 0)
treat <- surveyw1 %>% filter(Z == 1)
control_pure <- control %>% filter(!fieldID %in% treat$fieldID)

surveyw1 %>% group_by(Z) %>% summarise(mean_e = mean(paes_eval, na.rm = T), 
                                       sd_e = sd(paes_eval, na.rm = T))

sd(surveyw1[which(surveyw1$Z==0),]$paes_eval, na.rm = T)
#1.150089
#1.150089*0.19679-> 0.226326
#0.19 comes from out.all$out.itt.lm$out.lmc 


winners <- 2911
winners.eval <- control$paes_eval
# inpute missing evals with draws from the actual empirical distribution of evals
set.seed(0708)
n.na <- sum(is.na(control$paes_eval)) #number of NAs in paes_eval in the control group
winners.eval[is.na(winners.eval)] <- sample(na.omit(control$paes_eval),n.na)
tau <- 0.23
tally_t <- NA
tally_c <- NA
margin <- NA

set.seed(0708)
for (i in 1:1000){
  Y0 <- rbinom(2911,1,prob=plogis(winners.eval,scale=1))
  Y1 <- rbinom(2911,1,prob=plogis(winners.eval+0.23,scale=1))
  tally_t[i] <- sum(Y1)
  tally_c[i] <- sum(Y0)
  margin[i] <- tally_t[i] - tally_c[i]
}

summary(margin) #130
quantile(margin, probs = c(0.05, 0.5, 0.95))
hist(margin)

#alternative scale
winners <- 2911
winners.eval <- control$paes_eval
# inpute missing evals with draws from the actual empirical distribution of evals
set.seed(0708)
n.na <- sum(is.na(control$paes_eval)) #number of NAs in paes_eval in the control group
winners.eval[is.na(winners.eval)] <- sample(na.omit(control$paes_eval),n.na)
tau <- 0.23
tally_t <- NA
tally_c <- NA
margin <- NA

set.seed(0708)
for (i in 1:1000){
  Y0 <- rbinom(2911,1,prob=plogis(winners.eval,scale=0.5))
  Y1 <- rbinom(2911,1,prob=plogis(winners.eval+0.23,scale=0.5))
  tally_t[i] <- sum(Y1)
  tally_c[i] <- sum(Y0)
  margin[i] <- tally_t[i] - tally_c[i]
}

summary(margin) #176
quantile(margin, probs = c(0.05, 0.5, 0.95))
hist(margin)


#########################Re-scaling outcomes

sumtab <- summary_stats(data=subset(surveyw1,select=c("Z","D","D_composite", "idade",
                                                      pre.treat,outcomes)))
print(round(sumtab,2))
xsumtab <- xtable(sumtab, digits=c(0,0,2,2,2,2,2,0))
label(xsumtab) <- paste("tab:summary",sep="")
caption(xsumtab) <- paste("Descriptive Statistics (Standardized)",sep="")
print(xsumtab,
      type="latex",
      hline.after=1,
      caption.placement="top",
      file="Tables/tab-W1-descriptives-raw.tex")

#Re-scaling outcomes
surveyw1.unscaled <- surveyw1
surveyw1[outcomes] <- scalev(outcomes=outcomes,the.data=surveyw1)

#Standardized Descriptive Table
sumtab <- summary_stats(data=subset(surveyw1,select=c("Z","D","D_composite", "idade",
                                                      pre.treat,outcomes)))
print(round(sumtab,2))
xsumtab <- xtable(sumtab, digits=c(0,0,2,2,2,2,2,0))
label(xsumtab) <- paste("tab:summary",sep="")
caption(xsumtab) <- paste("Descriptive Statistics (Standardized)",sep="")
print(xsumtab,
      type="latex",
      hline.after=1,
      caption.placement="top",
      file="Tables/tab-W1-descriptives-std.tex")


####################################################################################
# Balance by Lottery                                                          ######
####################################################################################
the.lotteries <- unique(surveyw1$id_edital)

# Loop over lotteries  #
bal.stats <- list() #object to store balance results
for(ii in 1:length(the.lotteries)){ 
  print(the.lotteries[ii])
  lot <- subset(surveyw1, id_edital==the.lotteries[ii])
  #Implments an F-test
  the.formula <- formula(paste("Z~idade+",paste(pre.treat,collapse="+")))
  m1 <- lm_robust(the.formula, 
                  data = lot, 
                  weights = 1/sampling_prob,
                  se_type = "stata")
  m0 <- lm_robust(Z~idade, 
                  data = lot, 
                  weights = 1/sampling_prob,
                  se_type = "stata")
  #Standard F-test 
  the.wald <- lmtest::waldtest(m1, m0,  test = c("F"))
  print(the.wald)
  
  # Compute the p-value of the F-tests based on the permutation distribution of W
  W_obs <- lmtest::waldtest(m1, m0,  test = c("F"))[2,3] #store f-test for bootstrapped p-values 
  N <- nrow(lot)
  sims <- 1000
  n_treat <- as.numeric(table(lot$Z)[2])
  W_sims <- numeric(sims)
  for(i in 1:sims){
    lot$Z_sim <- complete_ra(N, n_treat)
    fit_sim1 <- lm_robust(formula(paste("Z_sim~idade+",paste(pre.treat,collapse="+"))), 
                          data = lot, weights = 1/sampling_prob, 
                          se_type = "stata")
    fit_sim0 <- lm_robust(Z_sim ~ idade, data = lot, weights = 1/sampling_prob,  
                          se_type = "stata")
    W_sims[i] <- lmtest::waldtest(fit_sim1, fit_sim0,  test = c("F"))[2,3]
  }
  p.permutation <- mean(W_sims >= W_obs)
  # Bootrsapped p-value o F-test
  cat("Simulated p-value:",p.permutation,"\n")   
  
  # Compute balance variable by variable controlling for age
  ballm <- bind_rows(lapply(1:length(pre.treat), outcomes=c(pre.treat)
                            , ctrls = "idade"
                            , FUN = est_lin, the.data=lot
                            , w = 1/lot$sampling_prob)) 
  rownames(ballm) <- pre.treat
  print(ballm)

  
  # Compute balance variable by variable controlling for age
  ballmnoage <- bind_rows(lapply(1:length(pre.treat), outcomes=c(pre.treat)
                                 , FUN = est_lm, the.data=lot
                                 , w = 1/lot$sampling_prob)) 
  rownames(ballmnoage) <- pre.treat
  print(ballmnoage)
  
#assemble objects with balance stats
bal.stats[[the.lotteries[ii]]] <- list(N=table(lot$Z),wald=the.wald
                                         ,p.permutation=p.permutation
                                         ,balage=ballm
                                         ,balnoage=ballmnoage)
} #end loop for lottery

save(bal.stats,file="Routputs/out-W1-balancebylottery.RData")

####################################################################################
# Balance Pooled                                                         ###########
####################################################################################

pol.bal.stats <- list() #object to store balance results
#Implements an F-test
the.formula <- formula(paste("Z~idade+",paste(pre.treat,collapse="+")))
m1 <- lm_robust(the.formula, 
                data = surveyw1, clusters = fieldID,
                fixed_effects =~id_edital, 
                weights = 1/sampling_prob,
                se_type = "stata")
m0 <- lm_robust(Z~idade, 
                data = surveyw1, clusters = fieldID,
                fixed_effects =~id_edital,
                weights = 1/sampling_prob,
                se_type = "stata")
#Standard F-test 
ftest <-  lmtest::waldtest(m1, m0,  test = c("F"))
print(xtable(ftest))

# Compute the p-value of the F-tests based on the permutation distribution of W
W_obs <- lmtest::waldtest(m1, m0,  test = c("F"))[2,3] #store f-test for bootstrapped p-values 
N <- nrow(surveyw1)
sims <- 1000
n_treat <- as.numeric(table(surveyw1$Z)[2])
W_sims <- numeric(sims)
for(i in 1:sims){
  surveyw1$Z_sim <- complete_ra(N, n_treat)
  fit_sim1 <- lm_robust(formula(paste("Z_sim~idade+",paste(pre.treat,collapse="+"))), 
                        data = surveyw1,
                        cluster = fieldID, 
                        fixed_effects =~id_edital,
                        se_type = "stata",
                        weights = 1/sampling_prob)
  fit_sim0 <- lm_robust(Z_sim ~ idade, data = surveyw1, weights = 1/sampling_prob, 
                        fixed_effects =~id_edital,
                        se_type = "stata")
  W_sims[i] <- lmtest::waldtest(fit_sim1, fit_sim0,  test = c("F"))[2,3]
}
p.permutation <- mean(W_sims >= W_obs)
# Bootrsapped p-value o F-test
cat("Simulated p-value:",p.permutation,"\n")

# Compute balance variable by variable controlling for age
ballm <- bind_rows(lapply(1:length(pre.treat), outcomes=pre.treat
                          , FUN = est_lin
                          , ctrls = c("idade","id_edital")
                          , clusters = "fieldID"
                          , w=1/surveyw1$sampling_prob
                          , the.data=surveyw1))
rownames(ballm) <- pre.treat
print(xtable(ballm))

# Compute balance variable by variable controlling without controlling for age  
ballmnoage <- bind_rows(lapply(1:length(pre.treat), outcomes=pre.treat
                               , FUN = est_lin
                               , ctrls = c("id_edital")
                               , clusters = "fieldID"
                               , w=1/surveyw1$sampling_prob
                               , the.data=surveyw1))
rownames(ballmnoage) <- pre.treat
print(ballmnoage)

#assemble objects with balance stats
pol.bal.stats <- list(N=table(surveyw1$Z),wald=ftest, 
                      p.permutation=p.permutation,
                      balage=ballm,
                      balnoage=ballmnoage)
save(pol.bal.stats,file="Routputs/out-W1-balancepooled.RData")

####################################################################################
# Attrition Pooled                                                      ############
####################################################################################

#Data for attrition analysis
load("Data/W1_attrition_overall_public.Rda")
load("Data/W1_attrition_public.Rda")
print(comment(W1_attrition_overall))
print(comment(W1_attrition))

### Answered by treatment and control 

attrition_overall <- lm_lin(interviewed ~ treat_ind, cluster = fieldID,
                            se_type = "stata", covariates = ~ id_edital,
                            weights = 1/sampling_prob,
                            data = W1_attrition_overall)

attrition_overallr <- lm_robust(interviewed ~ treat_ind, cluster = fieldID,
                                se_type = "stata", fixed_effects = ~ id_edital,
                                weights = 1/sampling_prob,
                                data = W1_attrition_overall)

### Answered by treatment and control (among those who answered)

attrition_answered <- lm_lin(interviewed ~ treat_ind, cluster = fieldID,
                             se_type = "stata", covariates = ~ id_edital,
                             weights = 1/sampling_prob,
                             data = W1_attrition)

attrition_answeredr <- lm_robust(interviewed ~ treat_ind, cluster = fieldID,
                                 se_type = "stata", fixed_effects = ~ id_edital,
                                 weights = 1/sampling_prob,
                                 data = W1_attrition)


#assemble objects with attrition stats
pol.attrition.stats <- list(attrition_overall, 
                      attrition_overallr, 
                      attrition_answered, 
                      attrition_answeredr)

save(pol.attrition.stats,file="Routputs/out-W1-attrition.RData")

##### Remove missing for Wald Test
W1_attrition_overall2 <- W1_attrition_overall %>% filter(!is.na(sexor))
W1_attrition2 <- W1_attrition %>% filter(!is.na(sexor))

#Including interactions of covariates
attrition_overall_covi <- lm_robust(interviewed ~ treat_ind + formal_pre_prop + sexor + av_prel + 
                                      treat_ind*formal_pre_prop + treat_ind*sexor + treat_ind*av_prel, 
                                    se_type = "stata", fixed_effects = ~ id_edital, clusters = fieldID,
                                    data = W1_attrition_overall2)
attrition_answered_covi <- lm_robust(interviewed ~ treat_ind + formal_pre_prop + sexor + av_prel + 
                                       treat_ind*formal_pre_prop + treat_ind*sexor + treat_ind*av_prel, 
                                     se_type = "stata", fixed_effects = ~ id_edital, clusters = fieldID,
                                     data = W1_attrition2)
### No interactions

attrition_overall_cov <- lm_robust(interviewed ~ treat_ind + formal_pre_prop + sexor + av_prel,
                                   se_type = "stata", fixed_effects = ~ id_edital, clusters = fieldID,
                                   data = W1_attrition_overall2)
attrition_answered_cov <- lm_robust(interviewed ~ treat_ind + formal_pre_prop + sexor + av_prel,
                                    se_type = "stata", fixed_effects = ~ id_edital, clusters = fieldID,
                                    data = W1_attrition2)

pol.attrition.joint.stats <- list(attrition_overall_covi, 
                                  attrition_answered_covi, 
                                  attrition_overall_cov, 
                                  attrition_answered_cov)

save(pol.attrition.joint.stats,file="Routputs/out-W1-attrition-joint.RData")


####################################################################################
# Analysis of Pooled Effects                                                  ######
# Estimation (3 estimators ITT-lin, ITT-lm, CACE-lm; 4 variants in each)      ######
# Summary tables (combining selected ITT and CACE)                            ######
####################################################################################

####  ITT-Lin: four variants  ####
#out.lin       & (sem idade e sem controles, com edital como covariate e survey weights) 
#out.linc.noage& (sem idade e com controles, com edital como covariate e survey weights) 
#out.lin.age   & (com idade, com edital como covariate e survey weights)
#out.linc      & (com idade e com controles, com edital como covariate e survey weights)
tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital")
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = est_lin, the.data=surveyw1))
rownames(tmp) <- outcomes
out.lin <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lin$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lin[get(the.fam[ii]),"HBp"] <- p.adjust(out.lin[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital",pre.treat)
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = est_lin, the.data=surveyw1))
rownames(tmp) <- outcomes
out.linc.noage <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.linc.noage$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.linc.noage[get(the.fam[ii]),"HBp"] <- p.adjust(out.linc.noage[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}  

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital","idade")
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = est_lin, the.data=surveyw1))
rownames(tmp) <- outcomes
out.lin.age <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lin.age$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lin.age[get(the.fam[ii]),"HBp"] <- p.adjust(out.lin.age[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital","idade",pre.treat)
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = est_lin, the.data=surveyw1))
rownames(tmp) <- outcomes
out.linc <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.linc$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.linc[get(the.fam[ii]),"HBp"] <- p.adjust(out.linc[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}

out.itt.lin <- list(out.lin=out.lin,out.linc.noage=out.linc.noage,
                    out.lin.age=out.lin.age,out.linc=out.linc)  

####  ITT-Lm: four variants  #### 
#out.lm        & (sem idade e sem controles, com edital como fixed effect e survey weights) 
#out.lmc.noage & (sem idade e com controles, com edital como fixed effect e survey weights) 
#out.lm.age    & (com idade, com edital como fixed effect e survey weights)
#out.lmc       & (com idade e com controles, com edital como fixed effect e survey weights)(*) 
tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= NULL
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = est_lm, the.data=surveyw1))
rownames(tmp) <- outcomes
out.lm <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lm$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lm[get(the.fam[ii]),"HBp"] <- p.adjust(out.lm[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c(pre.treat)
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = est_lm, the.data=surveyw1))
rownames(tmp) <- outcomes
out.lmc.noage <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmc.noage$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmc.noage[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmc.noage[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}


tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= "idade"
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = est_lm, the.data=surveyw1))
rownames(tmp) <- outcomes
out.lm.age <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lm.age$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lm.age[get(the.fam[ii]),"HBp"] <- p.adjust(out.lm.age[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("idade",pre.treat)
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = est_lm, the.data=surveyw1))
rownames(tmp) <- outcomes
out.lmc <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmc$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmc[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmc[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}

out.itt.lm <- list(out.lm=out.lm,out.lmc.noage=out.lmc.noage,
                   out.lm.age=out.lm.age,out.lmc=out.lmc)  

####  CACE-Lm: four variants  #### 
#out.lmcace        & (sem idade e sem controles, com edital como fixed effect e survey weights) 
#out.lmcacec.noage & (sem idade e com controles, com edital como fixed effect e survey weights) 
#out.lmcace.age    & (com idade, com edital como fixed effect e survey weights)
#out.lmcacec       & (com idade e com controles, com edital como fixed effect e survey weights)(*)   

tmp <- bind_rows(lapply(1:length(outcomes),  outcomes=outcomes, treat="Z"
                        ,compliance="D_composite" 
                        ,the.data=surveyw1
                        ,fe="id_edital"
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = est_cace)) 
rownames(tmp) <- outcomes
out.lmcace <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmcace$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmcace[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmcace[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}

tmp <- bind_rows(lapply(1:length(outcomes),  outcomes=outcomes, treat="Z"
                        ,compliance="D_composite" 
                        ,the.data=surveyw1
                        ,ctrls=pre.treat
                        ,fe="id_edital"
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = est_cace)) 
rownames(tmp) <- outcomes
out.lmcacec.noage <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmcacec.noage$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmcacec.noage[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmcacec.noage[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}

tmp <- bind_rows(lapply(1:length(outcomes),  outcomes=outcomes, treat="Z"
                        ,compliance="D_composite" 
                        ,the.data=surveyw1
                        ,ctrls="idade"
                        ,fe="id_edital"
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = est_cace)) 
rownames(tmp) <- outcomes
out.lmcace.age <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmcace.age$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmcace.age[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmcace.age[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}  

tmp <- bind_rows(lapply(1:length(outcomes),  outcomes=outcomes, treat="Z"
                        ,compliance="D_composite" 
                        ,the.data=surveyw1
                        ,ctrls=c("idade",pre.treat)
                        ,fe="id_edital"
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = est_cace)) 
rownames(tmp) <- outcomes
out.lmcacec <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmcacec$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmcacec[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmcacec[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}    

out.cace.lm <- list(out.lmcace=out.lmcace,out.lmcacec.noage=out.lmcacec.noage,
                    out.lmcace.age=out.lmcace.age,out.lmcacec=out.lmcacec)  


### Collect all results (list of lists)
out.all <- list(out.itt.lm=out.itt.lm,out.itt.lin=out.itt.lin,out.cace.lm=out.cace.lm)
comment(out.all) <- paste("Estimated in ",Sys.Date(),". Three estimators, four variants in each.",sep="")
save(out.all,file="Routputs/out-W1-estimates-pooled.RData")

############################################################################################
# Moderator Time Waiting Since enrollment  (coded for all individuals)
############################################################################################

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,moderator="wait_time_all"
                        ,fe = "id_edital" 
                        ,ctrls= c("idade",pre.treat)
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = reg_hetmod, the.data=surveyw1))
rownames(tmp) <- outcomes
out.waitenroll.lm <- as.data.frame(tmp)   

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,moderator="wait_time_all"
                        ,ctrls= c("idade",pre.treat, "id_edital")
                        ,clusters="fieldID"
                        ,w=1/surveyw1$sampling_prob
                        ,FUN = reg_hetmod_lin, the.data=surveyw1))
rownames(tmp) <- outcomes
out.waitenroll.lin <- as.data.frame(tmp)   

out.waitenroll <- list(out.waitenroll.lin=out.waitenroll.lin, waitenroll.lm=out.waitenroll.lm)  

save(out.waitenroll,file="Routputs/out-W1-estimates-pooled-hetwait.RData")

################## Testing Balance for Wait-times

surveyw1$w <- 1/surveyw1$sampling_prob

predictors <- texreg(lm_robust(wait_time_all~sexor + race_bin + religiao_dummy + 
              criancas_moram + sch_years + cadunico + formal + av_formalinc_log + idade, 
               data = surveyw1, clusters = fieldID, fixed_effects = ~id_edital,
               weights = w, se_type = "stata"))

pred_int <- texreg(lm_robust(wait_time_all~ Z + sexor + race_bin + religiao_dummy + 
            criancas_moram + sch_years + cadunico + formal + av_formalinc_log + idade +
            sexor*Z + race_bin*Z + religiao_dummy*Z + 
            criancas_moram*Z + sch_years*Z + cadunico*Z + formal*Z + av_formalinc_log*Z + idade*Z, 
             data = surveyw1, clusters = fieldID, fixed_effects = ~id_edital,
             weights = w, se_type = "stata"))

pred_wait <- list(predictors=predictors, pred_int=pred_int)  
save(pred_wait,file="Routputs/out-W1-pred-wait.RData")

#########Data cited in the paper

#number of respondents
length(unique(surveyw1$fieldID))

#Attribution PT
prop.table(table(surveyw1.unscaled$resp_lula_dilma_pt, surveyw1.unscaled$Z), 2)*100

#Attribution PMDB
prop.table(table(surveyw1.unscaled$resp_paes_pezao_temer, surveyw1.unscaled$Z), 2)*100


#Support Lula
prop.table(table(surveyw1.unscaled$lula_eval, surveyw1.unscaled$Z), 2)*100
mean(surveyw1.unscaled$lula_eval, na.rm = T)

#PT support
prop.table(table(surveyw1.unscaled$party_pt_r, surveyw1.unscaled$Z), 2)*100

#Paes support effect
load("Routputs/out-W1-estimates-pooled.RData")

out.all$out.itt.lin$out.linc #0.11
out.all$out.itt.lm$out.lmc #0.19
summary(surveyw1$paes_eval)
summary(surveyw1.unscaled$paes_eval)
0.11798/sd(surveyw1[which(surveyw1$Z == 0),]$paes_eval, na.rm = T)
0.19679/sd(surveyw1[which(surveyw1$Z == 0),]$paes_eval, na.rm = T)
surveyw1.unscaled %>% group_by(Z) %>% summarise(mean_e = mean(paes_eval, na.rm = T), 
                                                sd_e = sd(paes_eval, na.rm = T))

#########Data cited in the paper
summary(surveyw1$wait_time_all)
short <- surveyw1 %>% filter(wait_time_all <= 5.194) 
long <- surveyw1 %>% filter(wait_time_all > 5.194) 
hist(surveyw1$wait_time_all)
summary(short$wait_time_all)
summary(long$wait_time_all)

#Estimates Lula, PT, Dilma #lm
load("Routputs/out-W1-estimates-pooled-hetwait.RData")
out.waitenroll

prop.table(table(surveyw1$dream))*100

